package xyz.ibenben.zhongdian.common.util;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.xmlbeans.XmlException;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.*;

public class LuceneUtil {

    public static void main(String[] args) throws IOException {
        String word = "再见";
        LuceneUtil.createDirectory();
        LuceneUtil.checkWord(word);
    }

    public static void createDirectory() {
        try {
            //指定索引库的存放位置Directory对象
            Directory directory = FSDirectory.open(new File("D:\\dir").toPath());
            //指定一个标准分析器（会对每个字进行分词），对文档内容进行分析
            //Analyzer analyzer = new StandardAnalyzer();

            //指定第三方中文分词器，对文档内容进行分析
            Analyzer analyzer = new IKAnalyzer();
            //创建indexwriterConfig对象
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            //创建一个indexwriter对象
            IndexWriter indexWriter = new IndexWriter(directory, config);
            //原始文档的路径
            File file = new File("D:\\1");
            File[] fileList = file.listFiles();
            for (File file1 : fileList) {
                // 获取文件名称
                String fileName = file1.getName();
                // 获取文件后缀名，将其作为文件类型
                String fileType = fileName.substring(fileName.lastIndexOf(".") + 1).toLowerCase();
                //创建document对象
                Document document = new Document();

                InputStream in = new FileInputStream(file1);
                InputStreamReader reader;
                if (fileType != null && !"".equals(fileType)) {
                    if ("pdf".equals(fileType)) {
                        // 获取pdf文档
                        PDFParser parser = new PDFParser(in);
                        parser.parse();
                        PDDocument pdDocument = parser.getPDDocument();
                        PDFTextStripper stripper = new PDFTextStripper();
                        // 创建Field对象，并放入doc对象中
                        document.add(new TextField("fileContent", stripper.getText(pdDocument),
                                Field.Store.NO));
                        // 关闭文档
                        pdDocument.close();
                    } else if ("txt".equals(fileType)) {
                        // 建立一个输入流对象reader
                        reader = new InputStreamReader(in);
                        // 建立一个对象，它把文件内容转成计算机能读懂的语言
                        BufferedReader br = new BufferedReader(reader);
                        StringBuilder txtFile = new StringBuilder();
                        String line;
                        while ((line = br.readLine()) != null) {
                            // 一次读入一行数据
                            txtFile.append(line);
                        }
                        // 创建Field对象，并放入doc对象中
                        document.add(new TextField("fileContent", txtFile.toString(), Field.Store.NO));
                        reader.close();
                    } else if ("doc".equals(fileType)) {
                        WordExtractor ex = new WordExtractor(in);
                        String buffer = ex.getText();
                        document.add(new TextField("fileContent", buffer, Field.Store.NO));
                    } else if ("docx".equals(fileType)) {
                        OPCPackage opcPackage = POIXMLDocument.openPackage(file1.getAbsolutePath());
                        POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
                        String buffer = extractor.getText();
                        document.add(new TextField("fileContent", buffer,
                                Field.Store.NO));
                    } else if ("xls".equals(fileType)) {
                        StringBuilder buffer = new StringBuilder();
                        HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file1));
                        HSSFSheet sheet;
                        //获取每个Sheet表
                        for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
                            sheet = workbook.getSheetAt(i);
                            //获取每行
                            for (int j = 0; j < sheet.getPhysicalNumberOfRows(); j++) {
                                HSSFRow row = sheet.getRow(j);
                                if (row != null) {
                                    //获取每个单元格
                                    for (int k = 0; k < row.getPhysicalNumberOfCells(); k++) {
                                        HSSFCell cell = row.getCell(k);
                                        buffer.append(getHSSFCellText(cell)).append(",");
                                    }
                                }
                            }
                        }
                        document.add(new TextField("fileContent", buffer.toString(),
                                Field.Store.NO));
                    } else if ("xlsx".equals(fileType)) {
                        StringBuilder buffer = new StringBuilder();
                        XSSFWorkbook workbook = new XSSFWorkbook(new FileInputStream(file1));
                        XSSFSheet sheet;
                        for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
                            sheet = workbook.getSheetAt(i);
                            for (int j = 0; j < sheet.getPhysicalNumberOfRows(); j++) {
                                XSSFRow row = sheet.getRow(j);
                                if (row != null) {
                                    for (int k = 0; k < row.getPhysicalNumberOfCells(); k++) {
                                        XSSFCell cell = row.getCell(k);
                                        buffer.append(getXSSFCellText(cell)).append(",");
                                    }
                                }
                            }
                        }
                        document.add(new TextField("fileContent", buffer.toString(),
                                Field.Store.NO));
                    } else {
                        continue;
                    }

                }
                //创建field对象，将field添加到document对象中
                //创建文件名域
                //第一个参数：域的名称
                //第二个参数：域的内容
                //第三个参数：是否存储
                Field fileNameField = new TextField("fileName", fileName, Field.Store.YES);
                //文件的大小
                long fileSize = FileUtils.sizeOf(file1);
                //文件大小域
                Field fileSizeField = new NumericDocValuesField("fileSize", fileSize);
                //文件路径
                String filePath = file1.getPath();
                //文件路径域（不分析、不索引、只存储）
                Field filePathField = new StoredField("filePath", filePath);
                document.add(fileNameField);
                document.add(fileSizeField);
                document.add(filePathField);
                document.add(new StoredField("fileNameId", fileName.substring(0, fileName.lastIndexOf('.'))));
                //使用indexwriter对象将document对象写入索引库，此过程进行索引创建。并将索引和document对象写入索引库。
                Term term = new Term("fileNameId", fileName.substring(0, fileName.lastIndexOf('.')));
                indexWriter.updateDocument(term, document);
            }
            //关闭IndexWriter对象。
            indexWriter.close();
        } catch (IOException | OpenXML4JException | XmlException e) {
            e.printStackTrace();
        }
    }

    public static void checkWord(String word) throws IOException {
        //创建一个Directory对象，指定索引库存放的路径
        Directory directory = FSDirectory.open(new File("D:\\dir").toPath());
        //创建IndexReader对象，需要指定Directory对象
        try (IndexReader indexReader = DirectoryReader.open(directory)) {
            //创建Indexsearcher对象，需要指定IndexReader对象
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);

            //创建queryparser对象
            //第一个参数默认搜索的域
            //第二个参数就是分析器对象
            QueryParser queryParser = new QueryParser("fileContent", new IKAnalyzer());
            //使用默认的域
//            Query query = queryParser.parse(word);
            //不使用默认的域，可以自己指定域
            Query query = queryParser.parse("fileContent:" + word);
            //执行查询
            //第一个参数是查询对象，第二个参数是查询结果返回的最大值
            TopDocs topDocs = indexSearcher.search(query, 10);

            //查询结果的总条数
            System.out.println("查询结果的总条数：" + topDocs.totalHits);
            //遍历查询结果
            //topDocs.scoreDocs存储了document对象的id
            //ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                //scoreDoc.doc属性就是document对象的id
                //int doc = scoreDoc.doc;
                //根据document的id找到document对象
                Document document = indexSearcher.doc(scoreDoc.doc);
                //文件名称
                System.out.println(document.get("fileName"));
                //文件大小
                System.out.println(document.get("fileSize"));
                //文件路径
                System.out.println(document.get("filePath"));
                System.out.println("----------------------------------");
            }
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }

    public static String getHSSFCellText(HSSFCell cell) {
        if (cell != null) {
            if (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
                return String.valueOf(cell.getNumericCellValue());
            } else if (cell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
                return cell.getStringCellValue();
            }
        }
        return "";
    }

    public static String getXSSFCellText(XSSFCell cell) {
        if (cell != null) {
            if (cell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC) {
                return String.valueOf(cell.getNumericCellValue());
            } else if (cell.getCellType() == XSSFCell.CELL_TYPE_STRING) {
                return cell.getStringCellValue();
            }
        }
        return "";
    }
}
